In [1]:
import numpy as np
In [2]:
import pandas as pd
In [3]:
import matplotlib.pyplot as plt
In [4]:
pip install ydata-profiling
Requirement already satisfied: ydata-profiling in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (4.6.5)
Requirement already satisfied: scipy<1.12,>=1.4.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (1.11.1)
Requirement already satisfied: pandas!=1.4.0,<3,>1.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (2.0.3)
Requirement already satisfied: matplotlib<3.9,>=3.2 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (3.7.2)
Requirement already satisfied: pydantic>=2 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (2.6.3)
Requirement already satisfied: PyYAML<6.1,>=5.0.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (6.0)
Requirement already satisfied: jinja2<3.2,>=2.11.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (3.1.2)
Requirement already satisfied: visions[type_image_path]==0.7.5 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (0.7.5)
Requirement already satisfied: numpy<1.26,>=1.16.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (1.24.3)
Requirement already satisfied: htmlmin==0.1.12 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (0.1.12)
Requirement already satisfied: phik<0.13,>=0.11.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (0.12.4)
Requirement already satisfied: requests<3,>=2.24.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (2.31.0)
Requirement already satisfied: tqdm<5,>=4.48.2 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (4.65.0)
Requirement already satisfied: seaborn<0.13,>=0.10.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (0.12.2)
Requirement already satisfied: multimethod<2,>=1.4 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (1.11.2)
Requirement already satisfied: statsmodels<1,>=0.13.2 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (0.14.0)
Requirement already satisfied: typeguard<5,>=4.1.2 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (4.1.5)
Requirement already satisfied: imagehash==4.3.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (4.3.1)
Requirement already satisfied: wordcloud>=1.9.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (1.9.3)
Requirement already satisfied: dacite>=1.8 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (1.8.1)
Requirement already satisfied: numba<0.59.0,>=0.56.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from ydata-profiling) (0.57.1)
Requirement already satisfied: PyWavelets in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from imagehash==4.3.1->ydata-profiling) (1.4.1)
Requirement already satisfied: pillow in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from imagehash==4.3.1->ydata-profiling) (10.2.0)
Requirement already satisfied: attrs>=19.3.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from visions[type_image_path]==0.7.5->ydata-profiling) (22.1.0)
Requirement already satisfied: networkx>=2.4 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from visions[type_image_path]==0.7.5->ydata-profiling) (3.1)
Requirement already satisfied: tangled-up-in-unicode>=0.0.4 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from visions[type_image_path]==0.7.5->ydata-profiling) (0.2.0)
Requirement already satisfied: MarkupSafe>=2.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from jinja2<3.2,>=2.11.1->ydata-profiling) (2.1.1)
Requirement already satisfied: contourpy>=1.0.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (1.0.5)
Requirement already satisfied: cycler>=0.10 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (1.4.4)
Requirement already satisfied: packaging>=20.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (23.1)
Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from matplotlib<3.9,>=3.2->ydata-profiling) (2.8.2)
Requirement already satisfied: llvmlite<0.41,>=0.40.0dev0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from numba<0.59.0,>=0.56.0->ydata-profiling) (0.40.0)
Requirement already satisfied: pytz>=2020.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from pandas!=1.4.0,<3,>1.1->ydata-profiling) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from pandas!=1.4.0,<3,>1.1->ydata-profiling) (2023.3)
Requirement already satisfied: joblib>=0.14.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from phik<0.13,>=0.11.1->ydata-profiling) (1.2.0)
Requirement already satisfied: annotated-types>=0.4.0 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from pydantic>=2->ydata-profiling) (0.6.0)
Requirement already satisfied: pydantic-core==2.16.3 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from pydantic>=2->ydata-profiling) (2.16.3)
Requirement already satisfied: typing-extensions>=4.6.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from pydantic>=2->ydata-profiling) (4.7.1)
Requirement already satisfied: charset-normalizer<4,>=2 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from requests<3,>=2.24.0->ydata-profiling) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from requests<3,>=2.24.0->ydata-profiling) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from requests<3,>=2.24.0->ydata-profiling) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from requests<3,>=2.24.0->ydata-profiling) (2024.2.2)
Requirement already satisfied: patsy>=0.5.2 in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from statsmodels<1,>=0.13.2->ydata-profiling) (0.5.3)
Requirement already satisfied: six in /Users/yihengwang/anaconda3/lib/python3.11/site-packages (from patsy>=0.5.2->statsmodels<1,>=0.13.2->ydata-profiling) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [5]:
from ydata_profiling import ProfileReport
In [6]:
df = pd.read_csv('card_transdata.csv')
In [7]:
profile = ProfileReport(df, title="Pd Profiling Report")
In [8]:
profile.to_widgets()
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]
VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…
In [11]:
profile.to_notebook_iframe()
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
In [9]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 8 columns):
 #   Column                          Non-Null Count    Dtype  
---  ------                          --------------    -----  
 0   distance_from_home              1000000 non-null  float64
 1   distance_from_last_transaction  1000000 non-null  float64
 2   ratio_to_median_purchase_price  1000000 non-null  float64
 3   repeat_retailer                 1000000 non-null  float64
 4   used_chip                       1000000 non-null  float64
 5   used_pin_number                 1000000 non-null  float64
 6   online_order                    1000000 non-null  float64
 7   fraud                           1000000 non-null  float64
dtypes: float64(8)
memory usage: 61.0 MB
In [10]:
df.isnull().sum()
Out[10]:
distance_from_home                0
distance_from_last_transaction    0
ratio_to_median_purchase_price    0
repeat_retailer                   0
used_chip                         0
used_pin_number                   0
online_order                      0
fraud                             0
dtype: int64
In [27]:
# No Null value
In [12]:
# first to seperate the fraud transactions
In [12]:
fraud = df[df.fraud == 1]
In [13]:
legit = df[df.fraud == 0]
In [14]:
legit.distance_from_home.describe()
Out[14]:
count    912597.000000
mean         22.832976
std          52.828655
min           0.004874
25%           3.828942
50%           9.673847
75%          24.158057
max        8777.136420
Name: distance_from_home, dtype: float64
In [15]:
fraud.distance_from_home.describe()
Out[15]:
count    87403.000000
mean        66.261876
std        134.391608
min          0.025847
25%          4.585729
50%         15.454219
75%        101.110104
max      10632.723672
Name: distance_from_home, dtype: float64
In [16]:
df.groupby('fraud').mean()
Out[16]:
distance_from_home distance_from_last_transaction ratio_to_median_purchase_price repeat_retailer used_chip used_pin_number online_order
fraud
0.0 22.832976 4.301391 1.423642 0.881672 0.359402 0.109944 0.622225
1.0 66.261876 12.712185 6.006323 0.880119 0.256399 0.003123 0.946318
In [25]:
profileFraud = ProfileReport(fraud, title="Pd Fraud Profiling Report")
In [ ]:
 
In [21]:
profileFraud.to_notebook_iframe()
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
In [23]:
profileLegit = ProfileReport(legit, title="Pd Fraud Profiling Report")
In [24]:
profileLegit.to_notebook_iframe()
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
In [26]:
df.corr().round(4)
Out[26]:
distance_from_home distance_from_last_transaction ratio_to_median_purchase_price repeat_retailer used_chip used_pin_number online_order fraud
distance_from_home 1.0000 0.0002 -0.0014 0.1431 -0.0007 -0.0016 -0.0013 0.1876
distance_from_last_transaction 0.0002 1.0000 0.0010 -0.0009 0.0021 -0.0009 0.0001 0.0919
ratio_to_median_purchase_price -0.0014 0.0010 1.0000 0.0014 0.0006 0.0009 -0.0003 0.4623
repeat_retailer 0.1431 -0.0009 0.0014 1.0000 -0.0013 -0.0004 -0.0005 -0.0014
used_chip -0.0007 0.0021 0.0006 -0.0013 1.0000 -0.0014 -0.0002 -0.0610
used_pin_number -0.0016 -0.0009 0.0009 -0.0004 -0.0014 1.0000 -0.0003 -0.1003
online_order -0.0013 0.0001 -0.0003 -0.0005 -0.0002 -0.0003 1.0000 0.1920
fraud 0.1876 0.0919 0.4623 -0.0014 -0.0610 -0.1003 0.1920 1.0000